This notebook uses Clustergrammer2 to visualize the Cancer cell line Encyclopedia gene expression data (data obtained from the Broad-Institute). The CCLE project measured genetic data from over 1000 cancer cell lines and provides cell line annotations (e.g. tissue) that is used to generate cell type categories.
from clustergrammer2 import net
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
df = pd.read_csv('../data/CCLE/CCLE.txt.gz', compression='gzip', index_col=0)
from ast import literal_eval as make_tuple
cols = df.columns.tolist()
new_cols = [make_tuple(x) for x in cols]
df.columns = new_cols
net.load_df(df)
net.filter_N_top(inst_rc='row', N_top=1000, rank_type='var')
net.load_df(net.export_df().round(2))
net.widget()
net.load_df(df)
net.filter_N_top(inst_rc='row', N_top=1000, rank_type='var')
net.normalize(axis='row', norm_type='zscore')
net.load_df(net.export_df().round(2))
net.widget()